#Warning ignorance if generated
import warnings
warnings.filterwarnings("ignore")
#import necessary python packages for single-cell RNA SEQ analysis
import scanpy as sc #software suite of tools for single-cell analysis in python
import besca as bc #internal BEDA package for single cell analysis
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
import anndata as ad
from scipy.sparse import csr_matrix
import scanpy.external as sce
from harmony import harmonize
import umap.umap_ as umap
from scipy import io
print(ad.__version__)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
# gives error!! sc.logging.print_versions()
INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmpf68urjdb INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmpf68urjdb/_remote_module_non_scriptable.py INFO:lightning_fabric.utilities.seed:Global seed set to 0
0.9.1
#Reading last saved annoatated data object written in h5ad data format.
#We used similar adata variable to make similar previous data analysis
save_file = '/home/jana/scanpy_qc_filtered_pbmcs_for_sarcoid.h5ad'
adata=sc.read_h5ad(save_file)
#Finding Marker genes
#Logarithmize the data
sc.pp.log1p(adata)
#Finding marker genes using Wilcoxon rank-sum method
sc.tl.rank_genes_groups(adata, 'leiden_0.7', method='wilcoxon')
#Showing Top 25 Genes scorer of each clusters
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:05:05)
# Displaying a table of Top 5 highly differentialy highly scorer expressed genesfor each clusters in Leiden clustering with 0.7 resolution
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(5)
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | VCAN | RPL30 | CD79A | IL7R | RPS12 | S100A8 | CPVL | CCL5 | NKG7 | LST1 | TNFSF10 | CST3 | KLRB1 | IL32 | ITM2C | KCTD12 | PPBP | SUB1 | RPS4X | TNFRSF18 |
| 1 | LYZ | RPS3A | MS4A1 | LTB | RPS3A | S100A9 | FTH1 | NKG7 | GNLY | FCGR3A | MX1 | HLA-DRB1 | GZMK | TRAC | CCDC50 | FOS | PF4 | STMN1 | SNHG29 | KLRB1 |
| 2 | S100A9 | RPL32 | CD79B | IL32 | RPS6 | MNDA | FGL2 | IL32 | CST7 | AIF1 | PARP14 | HLA-DPA1 | KLRG1 | RTKN2 | LILRA4 | NEAT1 | NRGN | C12orf75 | NPM1 | CTSW |
| 3 | S100A8 | RPL21 | CD37 | TRAC | CD8B | S100A12 | HLA-DPA1 | CST7 | GZMA | SMIM25 | IFI44L | HLA-DQA1 | IL32 | CD3D | IRF8 | DUSP6 | GP1BB | PCLAF | RPS6 | AC004687.1 |
| 4 | CD14 | RPS15A | CD74 | TPT1 | RPL32 | VCAN | AIF1 | B2M | PRF1 | FCER1G | IFIT3 | HLA-DPB1 | GZMA | LTB | PLD4 | TNFAIP2 | CAVIN2 | PPIA | GAS5 | CD7 |
#Dotplot top five genes of each clusters inside adata with minimum log fold change =2
sc.pl.rank_genes_groups_dotplot(adata, n_genes=5, values_to_plot='logfoldchanges', min_logfoldchange=2, vmax=7, vmin=-7, cmap='bwr')
WARNING: dendrogram data not found (using key=dendrogram_leiden_0.7). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
using 'X_pca' with n_pcs = 50
Storing dendrogram info using `.uns['dendrogram_leiden_0.7']`
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
#Matrix plot top five scorer genes of each clusters inside adata
sc.pl.rank_genes_groups_matrixplot(adata, n_genes=5, use_raw=False, vmin=-3, vmax=3, cmap='bwr')
# Unique Genes list of 100 top high scores genes of all clusters present inside the adata
marker_gene_unique =['B2M', 'C12orf75', 'CAVIN2',
'CCDC50', 'CCL5', 'CD14', 'CD37', 'CD3D',
'CD7', 'CD74', 'CD79A', 'CD79B', 'CD8B',
'CPVL', 'CST3', 'CST7', 'CTSW', 'DUSP6',
'FCER1G', 'FCGR3A','FGL2', 'FOS', 'FTH1',
'GAS5', 'GNLY', 'GP1BB', 'GZMA', 'GZMK',
'HLA-DPA1', 'HLA-DPB1', 'HLA-DQA1', 'HLA-DRB1',
'IFI44L', 'IFIT3', 'IL32', 'IL7R', 'IRF8',
'ITM2C', 'KCTD12', 'KLRB1', 'KLRG1', 'LILRA4',
'LST1', 'LTB', 'LYZ', 'MNDA', 'MS4A1',
'MX1', 'NEAT1', 'NKG7', 'NPM1', 'NRGN',
'PARP14', 'PCLAF', 'PF4', 'PLD4', 'PPBP',
'PPIA', 'PRF1', 'RPL21', 'RPL30', 'RPL32',
'RPS12', 'RPS15A', 'RPS3A', 'RPS4X', 'RPS6',
'RTKN2', 'S100A12', 'S100A8', 'S100A9', 'SMIM25',
'SNHG29', 'STMN1', 'SUB1', 'TNFAIP2', 'TNFRSF18',
'TNFSF10', 'TPT1', 'TRAC', 'VCAN']
#Dotplot of unique markers genes
sc.pl.dotplot(adata, marker_gene_unique, groupby='leiden_0.7', dendrogram=True)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
#B-cell's known markers genes: CD79A, MS4A1
sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['CD79A', 'MS4A1'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning: `set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()` INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
# CD4 T-cell known markers genes: CD3D, IL7R
sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['CD3D','IL7R'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
#sc.pl.violin(adata, ['CD8A'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning: `set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()` INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
#CD8 T cells known markers genes: CD8A
sc.pl.violin(adata, ['CD8A'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
#CD14+ Monocytes known markers genes: CD14, LYZ
sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['CD14','LYZ'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning: `set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()` INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
# FCGR3A+ Monocytes known markers genes:FCGR3A, MS4A7
sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['FCGR3A', 'MS4A7'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning: `set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()` INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
#NK cells known marker genes: GNLY, NKG7
sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['GNLY', 'NKG7'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning: `set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()` INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
# Dendritic Cells known marker genes: FCER1A, CST3
sc.settings.set_figure_params(dpi=100)
sc.pl.violin(adata, ['FCER1A', 'CST3'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning: `set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()` INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
#Megakaryocytes cell marker gene: PPBP
sc.pl.violin(adata, ['PPBP'], groupby='leiden_0.7', figsize=(3,1), gridspec_kw={'wspace':0.8}, rotation=90, alpha=0.8)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
#Popular cell markers in a list
popular_marker_list=['CD79A', 'MS4A1', 'CD3D','IL7R', 'CD8A', 'CD14','LYZ', 'FCGR3A', 'MS4A7',
'GNLY', 'NKG7', 'FCER1A', 'CST3', 'PPBP']
#Dotplot of popular marker list in leiden clusters
sc.pl.dotplot(adata, popular_marker_list, groupby='leiden_0.7', dendrogram=True)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
# After Violin plot seeking the expression changes different markers genes,
#trying to make a temporary table for manual annotations
from tabulate import tabulate
table = [
['Cluster no', 'Markers','Type of Cell'],
['0', 'CD14, LYZ','CD14+ Monocytes'],
['1', 'CD3D, IL7R','CD4 T-cell'],
['2', 'CD79A, MS4A1','B-cell'],
['3', 'CD3D, IL7R','CD4 T-cell'],
['4', 'CD3D, IL7R, CD8A','CD4 T-cell and CD8 T cell'],
['5', 'CD14, LYZ','CD14+ Monocytes'],
['6', 'CD14, LYZ','CD14+ Monocytes'],
['7', 'CD3D, IL7R, CD8A, GNLY, NKG7' ,'CD4 T-cell, CD8 T cell and NK cells'],
['8', 'FCGR3A, MS4A7','FCGR3A+ Monocytes'],
['9', 'FCGR3A, MS4A7','FCGR3A+ Monocytes'],
['10', 'CD14, LYZ','CD14+ Monocytes'],
['11', 'FCER1A, CST3','Dendritic Cells'],
['12', 'CD3D, IL7R, GNLY, NKG7','CD4 T-cell and NK cells'],
['13', 'CD3D, IL7R','CD4 T-cell'],
['14', 'FCER1A, CST3','Dendritic Cells'],
['15', 'CD14, LYZ','CD14+ Monocytes'],
['16', 'PPBP','Megakaryocytes cells'],
['17', 'CD3D, IL7R, GNLY, NKG7','CD4 T-cell and NK cells'],
['18', 'CD3D, IL7R, CD8A','CD4 T-cell and CD8 T cell'],
['19', 'CD14,LYZ, GNLY, NKG7','CD14+ Monocytes and NK cells'],
]
print(tabulate(table))
---------- ---------------------------- ----------------------------------- Cluster no Markers Type of Cell 0 CD14, LYZ CD14+ Monocytes 1 CD3D, IL7R CD4 T-cell 2 CD79A, MS4A1 B-cell 3 CD3D, IL7R CD4 T-cell 4 CD3D, IL7R, CD8A CD4 T-cell and CD8 T cell 5 CD14, LYZ CD14+ Monocytes 6 CD14, LYZ CD14+ Monocytes 7 CD3D, IL7R, CD8A, GNLY, NKG7 CD4 T-cell, CD8 T cell and NK cells 8 FCGR3A, MS4A7 FCGR3A+ Monocytes 9 FCGR3A, MS4A7 FCGR3A+ Monocytes 10 CD14, LYZ CD14+ Monocytes 11 FCER1A, CST3 Dendritic Cells 12 CD3D, IL7R, GNLY, NKG7 CD4 T-cell and NK cells 13 CD3D, IL7R CD4 T-cell 14 FCER1A, CST3 Dendritic Cells 15 CD14, LYZ CD14+ Monocytes 16 PPBP Megakaryocytes cells 17 CD3D, IL7R, GNLY, NKG7 CD4 T-cell and NK cells 18 CD3D, IL7R, CD8A CD4 T-cell and CD8 T cell 19 CD14,LYZ, GNLY, NKG7 CD14+ Monocytes and NK cells ---------- ---------------------------- -----------------------------------
#AZIMUTH MARKERS GENES LEVEL 2 as in a DICT
azimuth_default_markers = {
'B_interm': ['MS4A1', 'TNFRSF13B', 'IGHM', 'IGHD', 'AIM2', 'CD79A', 'LINC01857', 'RALGPS2', 'BANK1', 'CD79B'],
'B_memory': ['MS4A1','COCH', 'AIM2', 'BANK1', 'SSPN', 'CD79A', 'TEX9', 'RALGPS2', 'TNFRSF13C', 'LINC01781'],
'B_naive':['IGHM', 'IGHD', 'CD79A', 'IL4R', 'MS4A1', 'CXCR4', 'BTG1', 'TCL1A', 'CD79B', 'YBX3'],
'Plasmablast': ['IGHA2', 'MZB1', 'TNFRSF17', 'DERL3', 'TXNDC5', 'TNFRSF13B', 'POU2AF1', 'CPNE5', 'NT5DC2'],
'CD4_CTL': ['GZMH', 'CD4', 'FGFBP2', 'ITGB1', 'GZMA', 'CST7', 'GNLY', 'B2M', 'IL32', 'NKG7'],
'CD4_Naive': ['TCF7', 'CD4', 'CCR7', 'IL7R', 'FHIT', 'LEF1', 'MAL', 'NOSIP', 'LDHB', 'PIK3IP1'],
'CD4_Prolif': ['MKI67', 'TOP2A', 'PCLAF', 'CENPF', 'TYMS', 'NUSAP1', 'ASPM', 'PTTG1', 'TPX2', 'RRM2'],
'CD4_TCM': ['IL7R', 'TMSB10', 'CD4', 'ITGB1', 'LTB', 'TRAC', 'AQP3', 'LDHB', 'IL32', 'MAL'],
'CD4_TEM': ['IL7R', 'CCL5', 'FYB1', 'GZMK', 'IL32', 'GZMA', 'KLRB1', 'TRAC', 'LTB', 'AQP3'],
'Treg':['RTKN2', 'FOXP3', 'AC133644.2', 'CD4', 'IL2RA', 'TIGIT', 'CTLA4', 'FCRL3', 'LAIR2', 'IKZF2'],
'CD8_Naive' : ['CD8B', 'S100B', 'CCR7', 'RGS10', 'NOSIP', 'LINC02446', 'LEF1', 'CRTAM', 'CD8A', 'OXNAD1'],
'CD8_Prolif' : ['MKI67', 'CD8B', 'TYMS', 'TRAC', 'PCLAF', 'CD3D', 'CLSPN', 'CD3G', 'TK1', 'RRM2'],
'CD8_TCM': ['CD8B', 'ANXA1', 'CD8A', 'KRT1', 'LINC02446', 'YBX3', 'IL7R', 'TRAC', 'NELL2', 'LDHB'],
'CD8_TEM': ['CCL5', 'GZMH', 'CD8A', 'TRAC', 'KLRD1', 'NKG7', 'GZMK', 'CST7', 'CD8B', 'TRGC2'],
'ASDC': ['PPP1R14A', 'LILRA4', 'AXL', 'IL3RA', 'SCT', 'SCN9A', 'LGMN', 'DNASE1L3', 'CLEC4C', 'GAS6'],
'cDC1': ['CLEC9A', 'DNASE1L3', 'C1orf54', 'IDO1', 'CLNK', 'CADM1', 'FLT3', 'ENPP1', 'XCR1', 'NDRG2'],
'cDC2':['FCER1A', 'HLA-DQA1', 'CLEC10A', 'CD1C', 'ENHO','PLD4', 'GSN', 'SLC38A1', 'NDRG2', 'AFF3'],
'pDC': ['ITM2C', 'PLD4', 'SERPINF1', 'LILRA4', 'IL3RA', 'TPM2', 'MZB1', 'SPIB', 'IRF4', 'SMPD3'],
'CD14_Mono': ['S100A9', 'CTSS', 'S100A8', 'LYZ', 'VCAN', 'S100A12', 'IL1B', 'CD14', 'G0S2', 'FCN1'],
'CD16_Mono': ['CDKN1C', 'FCGR3A', 'PTPRC', 'LST1', 'IER5', 'MS4A7', 'RHOC', 'IFITM3', 'AIF1', 'HES4'],
'NK': ['GNLY', 'TYROBP', 'NKG7', 'FCER1G', 'GZMB', 'TRDC', 'PRF1', 'FGFBP2', 'SPON2', 'KLRF1'],
'NK_Prolif': ['MKI67', 'KLRF1', 'TYMS', 'TRDC', 'TOP2A', 'FCER1G', 'PCLAF', 'CD247', 'CLSPN', 'ASPM'],
'NK_CD56br': ['XCL2', 'FCER1G', 'SPINK2', 'TRDC', 'KLRC1', 'XCL1', 'SPTSSB', 'PPP1R9A', 'NCAM1', 'TNFRSF11A'],
'Eryth': ['AHSP', 'ALAS2', 'CA1', 'SLC4A1', 'IFIT1B', 'TRIM58', 'SELENBP1', 'TMCC2'],
'HSPC': ['SPINK2', 'PRSS57', 'CYTL1', 'EGFL7', 'GATA2', 'CD34', 'SMIM24', 'AVP', 'MYB', 'LAPTM4B'],
'ILC': ['KIT', 'TRDC', 'TTLL10', 'LINC01229', 'SOX4', 'KLRB1', 'TNFRSF18', 'TNFRSF4', 'IL1R1', 'HPGDS'],
'Platelet': ['PPBP', 'PF4', 'NRGN', 'GNG11', 'CAVIN2', 'TUBB1', 'CLU', 'HIST1H2AC', 'RGS18', 'GP9'],
'dnT': ['PTPN3', 'MIR4422HG', 'NUCB2', 'CAV1', 'DTHD1', 'GZMA', 'MYB', 'FXYD2', 'GZMK', 'AC004585.1'],
'gdT':['TRDC', 'TRGC1', 'TRGC2', 'KLRC1', 'NKG7', 'TRDV2', 'CD7', 'TRGV9', 'KLRD1', 'KLRG1'],
'MAIT': ['KLRB1', 'NKG7', 'GZMK', 'IL7R', 'SLC4A10', 'GZMA', 'CXCR6', 'PRSS35', 'RBM24', 'NCR3']
}
#AZIMUTH MARKERS GENES another representation
B_interm = ['MS4A1', 'TNFRSF13B', 'IGHM', 'IGHD', 'AIM2', 'CD79A', 'LINC01857', 'RALGPS2', 'BANK1', 'CD79B']
B_memory = ['MS4A1','COCH', 'AIM2', 'BANK1', 'SSPN', 'CD79A', 'TEX9', 'RALGPS2', 'TNFRSF13C', 'LINC01781']
B_naive = ['IGHM', 'IGHD', 'CD79A', 'IL4R', 'MS4A1', 'CXCR4', 'BTG1', 'TCL1A', 'CD79B', 'YBX3']
Plasmablast = ['IGHA2', 'MZB1', 'TNFRSF17', 'DERL3', 'TXNDC5', 'TNFRSF13B', 'POU2AF1', 'CPNE5', 'HNT5DC2']
CD4_CTL = ['GZMH', 'CD4', 'FGFBP2', 'ITGB1', 'GZMA', 'CST7', 'GNLY', 'B2M', 'IL32', 'NKG7']
CD4_Naive = ['TCF7', 'CD4', 'CCR7', 'IL7R', 'FHIT', 'LEF1', 'MAL', 'NOSIP', 'LDHB', 'PIK3IP1']
CD4_Prolif = ['MKI67', 'TOP2A', 'PCLAF', 'CENPF', 'TYMS', 'NUSAP1', 'ASPM', 'PTTG1', 'TPX2', 'RRM2']
CD4_TCM = ['IL7R', 'TMSB10', 'CD4', 'ITGB1', 'LTB', 'TRAC', 'AQP3', 'LDHB', 'IL32', 'MAL']
CD4_TEM =['IL7R', 'CCL5', 'FYB1', 'GZMK', 'IL32', 'GZMA', 'KLRB1', 'TRAC', 'LTB', 'AQP3']
Treg = ['RTKN2', 'FOXP3', 'AC133644.2', 'CD4', 'IL2RA', 'TIGIT', 'CTLA4', 'FCRL3', 'LAIR2', 'IKZF2']
CD8_Naive =['CD8B', 'S100B', 'CCR7', 'RGS10', 'NOSIP', 'LINC02446', 'LEF1', 'CRTAM', 'CD8A', 'OXNAD1']
CD8_Prolif =['MKI67', 'CD8B', 'TYMS', 'TRAC', 'PCLAF', 'CD3D', 'CLSPN', 'CD3G', 'TK1', 'RRM2']
CD8_TCM =['CD8B', 'ANXA1', 'CD8A', 'KRT1', 'LINC02446', 'YBX3', 'IL7R', 'TRAC', 'NELL2', 'LDHB']
CD8_TEM = ['CCL5', 'GZMH', 'CD8A', 'TRAC', 'KLRD1', 'NKG7', 'GZMK', 'CST7', 'CD8B', 'TRGC2']
ASDC =['PPP1R14A', 'LILRA4', 'AXL', 'IL3RA', 'SCT', 'SCN9A', 'LGMN', 'DNASE1L3', 'CLEC4C', 'GAS6']
cDC1 =['CLEC9A', 'DNASE1L3', 'C1orf54', 'IDO1', 'CLNK', 'CADM1', 'FLT3', 'ENPP1', 'XCR1', 'NDRG2']
cDC2 =['FCER1A', 'HLA-DQA1', 'CLEC10A', 'CD1C', 'ENHO','PLD4', 'GSN', 'SLC38A1', 'NDRG2', 'AFF3']
pDC =['ITM2C', 'PLD4', 'SERPINF1', 'LILRA4', 'IL3RA', 'TPM2', 'MZB1', 'SPIB', 'IRF4', 'SMPD3']
CD14_Mono = ['S100A9', 'CTSS', 'S100A8', 'LYZ', 'VCAN', 'S100A12', 'IL1B', 'CD14', 'G0S2', 'FCN1']
CD16_Mono = ['CDKN1C', 'FCGR3A', 'PTPRC', 'LST1', 'IER5', 'MS4A7', 'RHOC', 'IFITM3', 'AIF1', 'HES4']
NK = ['GNLY', 'TYROBP', 'NKG7', 'FCER1G', 'GZMB', 'TRDC', 'PRF1', 'FGFBP2', 'SPON2', 'KLRF1']
NK_Prolif = ['MKI67', 'KLRF1', 'TYMS', 'TRDC', 'TOP2A', 'FCER1G', 'PCLAF', 'CD247', 'CLSPN', 'ASPM']
NK_CD56br = ['XCL2', 'FCER1G', 'SPINK2', 'TRDC', 'KLRC1', 'XCL1', 'SPTSSB', 'PPP1R9A', 'NCAM1', 'TNFRSF11A']
Eryth =['HBD', 'HBM', 'AHSP', 'ALAS2', 'CA1', 'SLC4A1', 'IFIT1B', 'TRIM58', 'SELENBP1', 'TMCC2']
HSPC =['SPINK2', 'PRSS57', 'CYTL1', 'EGFL7', 'GATA2', 'CD34', 'SMIM24', 'AVP', 'MYB', 'LAPTM4B']
ILC =['KIT', 'TRDC', 'TTLL10', 'LINC01229', 'SOX4', 'KLRB1', 'TNFRSF18', 'TNFRSF4', 'IL1R1', 'HPGDS']
Platelet =['PPBP', 'PF4', 'NRGN', 'GNG11', 'CAVIN2', 'TUBB1', 'CLU', 'HIST1H2AC', 'RGS18', 'GP9']
dnT =['PTPN3', 'MIR4422HG', 'NUCB2', 'CAV1', 'DTHD1', 'GZMA', 'MYB', 'FXYD2', 'GZMK', 'AC004585.1']
gdT =['TRDC', 'TRGC1', 'TRGC2', 'KLRC1', 'NKG7', 'TRDV2', 'CD7', 'TRGV9', 'KLRD1', 'KLRG1']
MAIT =['KLRB1', 'NKG7', 'GZMK', 'IL7R', 'SLC4A10', 'GZMA', 'CXCR6', 'PRSS35', 'RBM24', 'NCR3']
#azimuth markers DOTplot1
sc.settings.set_figure_params(dpi=70)
sc.pl.dotplot(adata, azimuth_default_markers, groupby='leiden_0.7', dendrogram=True)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/_settings.py:447: DeprecationWarning: `set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: 0, 1, 2, etc. var_group_labels: B_interm, B_memory, B_naive, etc.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
#initial_annotation
new_cluster_names = ['CD14+ Mono', #0
'Naive CD4T', #1
'B cell', #2
'CD4 T', #3
'Naive CD8', #4
'CD14+ Mono', #5
'CD14+ Mono', #6
'Mixed', #7
'NK', #8
'CD16 Mono', #9
'CD14+ Mono', #10
'DC', #11
'CD8M', #12
'Tregs', #13
'DC', #14
'Monocytes', #15
'Platelets', #16
'Mixed', #17
'CD8T', #18
'Mixed NK'] #19
bc.tl.annotate_cells_clustering(adata=adata, clustering_label='leiden_0.7', new_annotation_label='initial_annotation', new_cluster_labels=new_cluster_names)
#Original clusters made from Leiden 0.7 resolution and Initial annotations of that clusters
sc.pl.umap(adata, color = ['leiden_0.7', 'initial_annotation'], wspace = 0.1, legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
#write_the result
#import scipy io package
from scipy import io
save_file = '/home/jana/scanpy_qc_filtered_pbmcs_for_sarcoid.h5ad'
adata.write_h5ad(save_file)